##############################################################################
#
# 				Replication Code for Illsutrative Example in
#
#	"Strategy and Sample Selection - A Strategic Selection Estimator"
#
#
#							Lucas Leemann, 2014
#	
##############################################################################

rm(list=ls()) 						# clear working space

library(foreign)  					# Packages for data
library(mnormt)						# Packages for MVNormal
library(games)						# Package for data and alternative identification


# Replicate the example in the "games" Package
data(war1800)
f1 <- esc + war ~ s_wt_re1 + revis1 | 0 | regime1 | balanc + regime2
##    ^^^^^^^^^   ^^^^^^^^^^^^^^^^^   ^   ^^^^^^^   ^^^^^^^^^^^^^^^^
##        y              u11         u13    u14           u24

m1 <- egame12(f1, data = war1800)
summary(m1)



# Run with alternative identification (variance=1)


#################################################
# The Code: Srategic Estimator NO Selection
#################################################

struct <- function(x11,x14,x24,y,beta){
	x11 <- cbind(1,as.matrix(x11))
	x14=as.matrix(x14)
	x24=as.matrix(x24)
    x24 <- cbind(1,as.matrix(x24))
	n <- dim(x11)[1]
	n11 <- dim(x11)[2]
	n14 <- dim(x14)[2]
	n24 <- dim(x24)[2]
	b11 <- as.matrix(beta[1:n11])
	b13 <- as.matrix(beta[(n11+1)])
	b14 <- as.matrix(beta[(n11+2):(n11+2+n14-1)])
	b24 <- as.matrix(beta[-c(1:(n11+2+n14-1))])
	u24 <- x24%*%b24
	p2 <- pnorm((u24))
	u11 <- x11%*%b11
	u13 <- b13*rep(1,n)
	u14 <- x14%*%b14
	u12 <- p2*u14 + (1-p2)*u13
	p1 <- pnorm((p2*u14+(1-p2)*u13-u11))
	vec1 <- which(y[,1]==0)
	vec3 <- which(y[,2]==0)
	vec4 <- which(y[,2]==1)
	chunk1 <- log(1-p1)
	chunk3 <- log(p1*(1-p2))
	chunk4 <- log(p1*p2)
	ll <- sum(chunk1[vec1]) + sum(chunk3[vec3]) + sum(chunk4[vec4]) 
	return(-ll)	
	}


# Set up data appropriately

Y <- cbind(war1800$esc, war1800$war)
Y[war1800$esc==0,2] <- 88
dem1 <- rep(NA, length(war1800$regime1))
dem1[war1800$regime1=="dem"] <- 1
dem1[war1800$regime1=="mixed"|war1800$regime1=="autoc"] <- 0
mixed1 <- rep(NA, length(war1800$regime1))
mixed1[war1800$regime1=="mixed"] <- 1
mixed1[war1800$regime1=="autoc"|war1800$regime1=="dem"] <- 0

dem2 <- rep(NA, length(war1800$regime2))
dem2[war1800$regime2=="dem"] <- 1
dem2[war1800$regime2=="mixed"|war1800$regime2=="autoc"] <- 0
mixed2 <- rep(NA, length(war1800$regime2))
mixed2[war1800$regime2=="mixed"] <- 1
mixed2[war1800$regime2=="autoc"|war1800$regime2=="dem"] <- 0

startval <- rep(0.1,10)
startvalues2 <- glm(war1800$war ~  war1800$balanc + dem2 + mixed2 ,family=binomial(link="probit"))$coefficient
startval[7:10] <- startvalues2 

dataL <- data.frame(cbind(Y[,1],Y[,2], dem1, mixed1, dem2, mixed2, war1800$s_wt_re1, war1800$revis1, war1800$balanc))

colnames(dataL) <- c("esc", "war", "dem1", "mixed1", "dem2", "mixed2", "s_wt_re1", "revis1", "balanc")
dataL <- na.omit(dataL)
dataL$war[dataL$war==88] <- NA 

out.struc <- optim(startval, struct, x11=cbind(dataL$s_wt_re1, dataL$revis1),
x14=cbind(dataL$dem1, dataL$mixed1), x24=cbind(dataL$balanc, dataL$dem2, dataL$mixed2), y=cbind(dataL$esc, dataL$war), hessian=TRUE, method="BFGS", control=list(trace=5, REPORT=4))

# create table with estimation results

parameters.struc <- out.struc$par
ses.struc <- sqrt(diag(try(solve(out.struc$hessian))))
zz <- parameters.struc/ses.struc
pp <- 2*(1-pnorm(abs(zz)))
cbind(parameters.struc,ses.struc,zz,pp)

#mtest <- glm(war[esc==1] ~ balanc[esc==1] + regime2[esc==1],  family=binomial(link="probit"), data=war1800)
#summary(mtest)


# Now, run same example with strategic selection estimator

#################################################
# The Code: Strategy A N D Selection  Estimator
#################################################


selstruc <- function(x11,x14,x24,y,beta){
	x11 <- cbind(1,as.matrix(x11))
	x14=as.matrix(x14)
	x24=as.matrix(x24)
    x24 <- cbind(1,as.matrix(x24))
	n <- dim(x11)[1]
	n11 <- dim(x11)[2]
	n14 <- dim(x14)[2]
	n24 <- dim(x24)[2]
	b11 <- as.matrix(beta[1:n11])
	b13 <- as.matrix(beta[(n11+1)])
	b14 <- as.matrix(beta[(n11+2):(n11+2+n14-1)])
	b24 <- as.matrix(beta[(n11+2+n14):(n11+2+n14+n24-1)])
	para <- as.matrix(beta[-c(1:(n11+2+n14+n24-1))])
	rho <- 2*(1/(1+exp(-para)))-1
	xb2.sp <- x24%*%b24												# latent D.utility for player 2 for a_4
	p.p <- pnorm(xb2.sp/(sqrt(1)))									# p(Y4=1|Y1=0)
	u1.34 <- -x11%*%b11 + p.p*(x14%*%b14) + (1-p.p)*rep(b13,n)		# latent D.utility for player 1 for a_2
	part1 <- cbind(-xb2.sp,u1.34); part2 <- cbind(xb2.sp,u1.34)		# combining for mvnorm
	covar1 <- matrix(c(1,-rho,-rho,1),2,2)				# negative rho
	covar2 <- matrix(c(1,rho,rho,1),2,2)				# positive rho
	prob1 <- apply(part1,1,pmnorm, mean=rep(0,2),varcov=covar1)	# y_1=1; y_2=0
	prob2 <- apply(part2,1,pmnorm, mean=rep(0,2),varcov=covar2)	# y_1=1; y_2=1
	prob3 <- 1-pnorm(u1.34)										# y_1=0; y_2=?
	Y <- y
	vec3 <- which(Y==1); vec2 <- which(Y==4); vec1 <- which(Y==3) # which obs belong where?
	chunk1 <- sum(log(prob1[vec1]))								# for each y group, the 
	chunk2 <- sum(log(prob2[vec2]))								# partial LL contribution
	chunk3 <- sum(log(prob3[vec3]))								# it makes
	ll <- chunk1 + chunk2 + chunk3 
	return(-ll)
	}
	
Y <- rep(0,length(dataL$war))
Y[dataL$esc==0] <- 1
Y[dataL$esc==1&dataL$war==0] <- 3
Y[dataL$esc==1&dataL$war==1] <- 4

y <- Y
startval <- rep(0.1,10)
startval <- c(startval,1)
out.selStruc <- optim(startval, selstruc, x11=cbind(dataL$s_wt_re1, dataL$revis1),
x14=cbind(dataL$dem1, dataL$mixed1), x24=cbind(dataL$balanc, dataL$dem2, dataL$mixed2), y=Y, hessian=TRUE, 	method="BFGS", control=list(trace=5, REPORT=4))

# set up table for estimation results

parameters.selStruc <- out.selStruc$par
ses.selStruc <- sqrt(diag(try(solve(out.selStruc$hessian))))
zz <- parameters.selStruc/ses.selStruc
pp <- 2*(1-pnorm(abs(zz)))
outtable.selStruc <- cbind(parameters.selStruc,ses.selStruc,zz,pp)
# attention: rho has to be transformed (Note: Variance estimate will be corrected later)
rho <- 2*(1/(1+exp(-out.selStruc$par[11])))-1

# compare to non-selection:
parameters.struc <- out.struc$par
ses.struc <- sqrt(diag(try(solve(out.struc$hessian))))
zz <- parameters.struc/ses.struc
pp <- 2*(1-pnorm(abs(zz)))
outtable.struc <- cbind(parameters.struc,ses.struc,zz,pp)


######################################################################
#
#	CORRECTLY PREDICTED OUTCOMES
#
######################################################################


outcome.pr <- predict(m1, type="outcome")
outcome.egame <- rep(NA,dim(outcome.pr)[1])

for (i in 1:length(outcome.egame)){
	ifelse(outcome.pr[i,1]>outcome.pr[i,2] & outcome.pr[i,1]>outcome.pr[i,3], outcome.egame[i] <- 1, ifelse(outcome.pr[i,2]>outcome.pr[i,3] & outcome.pr[i,2]>outcome.pr[i,1], outcome.egame[i] <- 2, outcome.egame[i] <- 3))
}
real.outcomes <- rep(NA,dim(outcome.pr)[1])

for (i in 1:length(outcome.egame)){
	ifelse(dataL$esc[i]==0, real.outcomes[i]<- 1, ifelse(dataL$war[i]==0, real.outcomes[i]<- 2, real.outcomes[i]<- 3))
	}
	
outcome.selstruc <- rep(NA,dim(outcome.pr)[1])

beta <- out.selStruc$par
x11 <- cbind(1,dataL$s_wt_re1, dataL$revis1)
x14 <- cbind(dataL$dem1, dataL$mixed1)
x24 <- cbind(1,dataL$balanc, dataL$dem2, dataL$mixed2)		

n <- dim(x11)[1]
b11 <- beta[1:3]
b13 <- beta[4]
b14 <- beta[5:6]
b24 <- beta[7:10]
para <- beta[11]
rho <- 2*(1/(1+exp(-para)))-1

xb2.sp <- x24%*%b24				# latent D.utility for player 2 for a_4
p.p <- pnorm(xb2.sp/(sqrt(1)))	# p(Y4=1|Y1=0)
u1.34 <- -x11%*%b11 + p.p*(x14%*%b14) + (1-p.p)*rep(b13,n)	# latent D.utility for player 1 for a_2
part1 <- cbind(-xb2.sp,u1.34); part2 <- cbind(xb2.sp,u1.34)		# combining for mvnorm
covar1 <- matrix(c(1,-rho,-rho,1),2,2)				# negative rho
covar2 <- matrix(c(1,rho,rho,1),2,2)	
	
	
prob1 <- apply(part1,1,pmnorm, mean=rep(0,2),varcov=covar1)	# y_1=1; y_2=0
prob2 <- apply(part2,1,pmnorm, mean=rep(0,2),varcov=covar2)	# y_1=1; y_2=1
prob3 <- 1-pnorm(u1.34)	
	
out.prob <- cbind(prob3, prob1, prob2)

outcome.selstruc <- rep(NA,dim(outcome.pr)[1])

for (i in 1:length(outcome.egame)){
	ifelse(out.prob[i,1]>out.prob[i,2] & out.prob[i,1]>out.prob[i,3], outcome.selstruc[i] <- 1, ifelse(out.prob[i,2]>out.prob[i,3] & out.prob[i,2]>out.prob[i,1], outcome.selstruc[i] <- 2, outcome.selstruc[i] <- 3))
}

table(outcome.selstruc, real.outcomes)
sum(diag(table(outcome.selstruc, real.outcomes)))/282

table(outcome.egame, real.outcomes)
sum(diag(table(outcome.egame, real.outcomes)))/282


####################################################################################
#
# TABLE & COEFPLOT
#
####################################################################################


# output table
library(memisc)
outii <- matrix(NA,22,3)
#round(outtable.struc, 3)[,c(1:2)]
une <- c(1,3,5,7,9,11,13,15,17,19,21)	
eve <- c(2,4,6,8,10,12,14,16,18,20,22)	
outii[une,2] <- c(round(outtable.struc, 3)[,1],888)
outii[eve,2] <- c(round(outtable.struc, 3)[,2],888)	
outii[une,1]<- c(round(m1$coefficient,3),888)	
outii[eve,1]<- c(round(sqrt(diag(m1$vcov)),3),888)	
outii[une,3] <- round(outtable.selStruc, 3)[,1]
outii[eve,3] <- round(outtable.selStruc, 3)[,2]		
	

NNN <- c(282, 282, 282)
llike <- c(out.struc$value, out.struc$value, out.selStruc$value)
outii <- rbind(outii,NNN, llike)

# Corrections for transformed rho
rho.draw <- rnorm(100000, out.selStruc$par[11],ses.selStruc[11])
rho.sim <- 2*(1/(1+exp(-rho.draw)))-1

outii[21,3] <- median(rho.sim)
outii[22,3] <- sd(rho.sim)

toLatex(outii,digits=3)

	

############################################################
#
# 	C O E F P L O T 
#
############################################################

par(bty="n", oma=c(0,0,0,0), mar=c(3,7,0.4,0.4))
#
coef <- rep(0,11)
se.coef <- rep(0,11)
for (i in 1:11){
	coef[i]<-2*i-1
}
for (i in 1:11){
	se.coef[i]<-2*(i-1)+2
}

rho <- outii[21,3]
se.rho <- outii[22,3]
  
beta.2 <- rev(outii[coef,2])
se.beta2 <- rev(outii[se.coef,2])
#
y.beta <- c(1:11)+0.1

#
beta2.x0 <- rep(0,11)
beta2.x1 <- rep(0,11)
for (i in 2:11){
		beta2.x0[i] <- beta.2[i]-1.96*se.beta2[i]
		beta2.x1[i] <- beta.2[i]+1.96*se.beta2[i]
		}
beta2.x0sh <- rep(NA,11)
beta2.x1sh <- rep(NA,11)
for (i in 2:11){
		beta2.x0sh[i] <- beta.2[i]-1.64*se.beta2[i]
		beta2.x1sh[i] <- beta.2[i]+1.64*se.beta2[i]
		}
beta2.x0 <- rev(beta2.x0)
beta2.x1 <- rev(beta2.x1)
beta2.x0sh <- rev(beta2.x0sh)
beta2.x1sh <- rev(beta2.x1sh)
beta2.y0 <- c(11:1)+.1
beta2.y1 <- c(11:1)+.1

plot(beta.2,y.beta, xlim=c(-2.5,2.5), ylim=c(0.5,11.5),col="grey75", pch=16, ylab="", xlab="", yaxt="n", cex.axis=0.7)
rect(par("usr")[1], par("usr")[3], par("usr")[2], par("usr")[4], col=rgb(50,50,50,5,maxColorValue=255), border=NA)
segments(beta2.x0,beta2.y0,beta2.x1,beta2.y1,col="grey75", lwd=1.0)
segments(beta2.x0sh,beta2.y0,beta2.x1sh,beta2.y1,col="grey75", lwd=1.99)
abline(v=0, lwd=1.5, col="grey")
abline(h=1.6, lwd=1, col="gray", lty=2)
beta.2 <- rev(outii[coef,3])
beta.2[1] <- median(rho.sim) 
se.beta2 <- rev(outii[se.coef,3])
y.beta <- c(1:11)-0.1

#
beta2.x0 <- rep(0,11)
beta2.x1 <- rep(0,11)
for (i in 2:11){
		beta2.x0[i] <- beta.2[i]-1.96*se.beta2[i]
		beta2.x1[i] <- beta.2[i]+1.96*se.beta2[i]
		}
beta2.x0sh <- rep(0,11)
beta2.x1sh <- rep(0,11)
for (i in 2:11){
		beta2.x0sh[i] <- beta.2[i]-1.64*se.beta2[i]
		beta2.x1sh[i] <- beta.2[i]+1.64*se.beta2[i]
		}
beta2.x0 <- rev(beta2.x0)
beta2.x1 <- rev(beta2.x1)
beta2.x0sh <- rev(beta2.x0sh)
beta2.x1sh <- rev(beta2.x1sh)

beta2.y0 <- c(11:1)-.1
beta2.y1 <- c(11:1)-.1

y.beta
points(beta.2,y.beta, col="black",pch=17)
segments(beta2.x0[-11],beta2.y0[-11],beta2.x1[-11],beta2.y1[-11] , col="black")
segments(beta2.x0sh[-11],beta2.y0[-11],beta2.x1sh[-11],beta2.y1[-11],col="black", lwd=1.99)
# for rho se
segments(sort(rho.sim)[2500], 1-.1, sort(rho.sim)[97500], 1-.1, col="black", lwd=1.0)
segments(sort(rho.sim)[5000], 1-0.1, sort(rho.sim)[95000], 1-.1, col="black", lwd=1.99)


names.beta <- c(expression(paste("U"[11],": Intercept")), expression(paste("U"[11],": S-score")),  expression(paste("U"[11],": Revisionist")),"Aims",  expression(paste("U"[14],": Intercept")),  expression(paste("U"[14],": Democratic")), " Regime",  expression(paste("U"[14],": Mixed")), "Regime", expression(paste("U"[24],": Intercept")),  expression(paste("U"[24],": Balance")),  expression(paste("U"[24],": Democratic")), "Regime", expression(paste("U"[24],": Mixed")), "Regime", expression(paste("Cor(",epsilon[1],",",epsilon[2],")")))


x.names <- c(11:1)
deltNA <- 0.2
x.names <- c(x.names[1:2],9+deltNA,9-deltNA,x.names[4:11])
x.names <- c(x.names[1:5],7+deltNA,7-deltNA,x.names[7:12])
x.names <- c(x.names[1:7],6+deltNA,6-deltNA,x.names[9:13])
x.names <- c(x.names[1:11],3+deltNA,3-deltNA,x.names[13:14])
x.names <- c(x.names[1:13],2+deltNA,2-deltNA,x.names[15])

text(-2.8, x.names, names.beta, xpd = TRUE, cex = 1.0, adj=1)    


legend(1.5,10,pch=c(16,17), col=c("gray75","black"),legend=c("Strat", "SSa"), cex=0.8, bty="n")
xx <- 1.66
yy <- 8.8

text(1.5,1.1,"CPC = 50.0%", col="black", cex=1.0)
text(1.5,0.75,"CPC = 47.9%", col="gray60", cex=1.0)
text(1.28,0.4,"N = 282", cex=1.0)



